#-------------------------------------------------------------------------------
#
# Electoral predictors - fit margin last poll model
#
# Author: Sina Chen
#
#-------------------------------------------------------------------------------


# Libraries ---------------------------------------------------------------

{
  library(tidyverse)
  library(rstan)
  rstan_options(auto_write = TRUE)
  options(mc.cores = parallel::detectCores())
}


# Data --------------------------------------------------------------------

polls <- readRDS("data/us_senate_polls1990_2022_final.RDS")


# Preparation -------------------------------------------------------------

polls <- polls %>%
  filter(dte < 101) %>%                              # exclude polls conducted more than 100 days before election day
  group_by(state, cycle) %>%                       # compute election groups  
  mutate(n_poll = n(),
         last_poll = max(end_date)) %>% 
  ungroup() %>% 
  filter(n_poll >= 5) %>%                          # exclude elections with less than 5 polls
  mutate(state_year = paste0(state, cycle),
         cycle = as.integer(cycle),
         state_year_int = as.integer(as.factor(state_year)),
         t_sc = as.numeric(dte)/max(as.numeric(dte)),  # scale days to election 0-1
         last_poll_margin = if_else(end_date == last_poll, pct2_rep-pct2_dem, NA_real_))
  
# election-level data 
vote_sy <- polls %>%
  group_by(state_year, cycle,  state, vote2_rep) %>%
  summarise(last_poll_margin = mean(last_poll_margin, na.rm = T)) %>%
  ungroup()

# Stan data 
stan_dat <- list(
  N = nrow(polls),                             # number of polls
  SY = length(unique(polls$state_year)),       # number of elections

  poll = polls$pct2_rep,                       # two-party poll share
  vote = vote_sy$vote2_rep,                    # two-party vote share
  
  feature = vote_sy$last_poll_margin,          # election-level feature
  t = polls$t_sc,                              # scaled days to election
  
  sample_size = polls$sample_size * 
    (polls$vote_rep + polls$vote_dem),         # sample size adjusted for Rep. & Dem. poll share
  
  sy_id = polls$state_year_int                 # election id
  
)

# check Stan data
sapply(stan_dat, length)
sapply(stan_dat, range)


# Fit Stan model ----------------------------------------------------------

resStan <- stan(file = "code/fit_stan/stan_ml/ml_senate_bias_context_cont.stan", 
                data = stan_dat,
                chains = 4, iter = 5000,
                control = list(adapt_delta = 0.95, max_treedepth = 12)
) 

# save simulation results
saveRDS(resStan, 'code/fit_stan/resStan_us_senate_context1990_2022_last.RDS')
